Examine Lna pulldown data

Organize single cell libraries

First designate the libraries and the cells that were subsampled.

cells <- list(
  mkcell_pulldown = c("TGCGCAGCAGGTCGTC",
              "ACTTGTTAGGACCACA",
              "CCATTCGTCCCTGACT",
              "TGTCCCAGTAAACACA"))

libs <- c(
  "kirkpatrick",
  "mkcell_pulldown")

bc_metadat <- read_tsv(file.path(data_dir, 
                         "kirkpatrick", 
                         "fastq",
                         "original", 
                         "barcodes_from_10x_run.txt"),
                         col_names = c("barcode_10x", "cell_id"))

## original library to compare against
reflib <- "kirkpatrick"
subsampled_libs <- "mkcell_pulldown"

Load and organize a table for each library of read counts per cell per gene, and a table of umi counts per cell per gene.

## read in featurecount table with read counts per gene per cell
read_dat <- map(libs, 
                ~read_tsv(file.path(data_dir, 
                          .x,
                          "featurecounts",
                          "assigned_read_counts.txt"),
                          skip = 1))
names(read_dat) <- libs

## read in umi_tools count table with umi counts per gene per cell
umi_dat <- map(libs, 
                ~read_tsv(file.path(data_dir, 
                          .x,
                          "dgematrix",
                          "dge_matrix.txt")))
names(umi_dat) <- libs

## format read table to match umi table
read_dat <- map(read_dat, function(x){
                       x <- select(x, gene = Geneid, 7:ncol(x))
                       cols <- colnames(x)
                       gene_col <- cols[1]
                       cell_cols <- cols[2:length(cols)]
                       new_cell_cols <- str_split(cell_cols, ":", simplify = T)
                       new_cell_cols <- new_cell_cols[, 2]
                       new_cell_cols <- str_split(new_cell_cols, "-", simplify = T)
                       new_cell_cols <- new_cell_cols[, 1]
                       colnames(x) <- c(gene_col, new_cell_cols)
                       x
                     })

## add in all missing gene features to dge_matrix and make colnames and rows match (for easy comparison)
umi_dat <- map2(umi_dat, read_dat, 
          function(x, y){
            genes_to_add <- y$gene[!y$gene %in% x$gene]
            ycols <- ncol(y)
            xcols <- ncol(x)
            if(xcols != ycols){
              stop("check ncols")
            }
            x <- x[, colnames(y)]
            zeros <- matrix(rep(0L, length(genes_to_add) * (xcols - 1)),
                            nrow = length(genes_to_add))
            zero_df <- as_data_frame(zeros) %>% 
              mutate(gene = genes_to_add) %>% 
              dplyr::select(gene, everything())
            colnames(zero_df) <- colnames(x)
            x <- bind_rows(x, zero_df)
            x <- x[rownames(y), ]
            if(!all(rownames(x) == rownames(y))){
              stop("check new rownames")
            }
            x
          })

cell_obj_mdata <- map(cells, 
                      ~mutate(bc_metadat, 
                              subsampled = ifelse(cell_id %in% .x,
                                                  TRUE,
                                                  FALSE)))

Next organize these tables into simple classes called subsampled-sets to keep track of each experiment’s relavant raw, processed, and meta data.

#' simple class to hold info for each experiment
create_sc_obj <- function(umi_df,
                          read_df,
                          cell_mdata_df){
  x <- list()
  class(x) <- "subsampled-set"
  x$umis <- umi_df
  x$reads <- read_df
  x$meta_dat <- cell_mdata_df
  return(x)
}

sc_objs <- list(umi_dat, read_dat, cell_obj_mdata)
sc_objs <- pmap(sc_objs, create_sc_obj)

rm(umi_dat)
rm(read_dat)

Next perform basic processing. 1) generate separate objects to store sparse matrices of umi and read counts. 2) normalize read and umi count data by total library size (sum of all read or umi counts for all cells in the experiment) and report as Reads per million or UMIs per million. 3) Compute per cell metrics (read and umi counts, sequencing saturation)

tidy_to_matrix <- function(df){
   df <- as.data.frame(df)
   rownames(df) <- df[, 1]
   df[, 1] <- NULL
   mat <- as.matrix(df)
   mat <- as(mat, "sparseMatrix")   
   return(mat)
}

#' keep both tidy and matrix objs
generate_matrices <- function(sc_obj){
  sc_obj$umi_matrix <- tidy_to_matrix(sc_obj$umis)
  sc_obj$read_matrix <- tidy_to_matrix(sc_obj$reads)
  sc_obj
}

#' normalize by library size (Reads per Million)
norm_libsize <- function(sc_obj){
  sc_obj$norm_umi <- 1e6 * sweep(sc_obj$umi_matrix, 2, 
                                 sum(as.vector(sc_obj$umi_matrix)), "/")
  sc_obj$norm_reads <- 1e6 * sweep(sc_obj$read_matrix, 2, 
                                   sum(as.vector(sc_obj$read_matrix)), "/")
  sc_obj
}

add_metadata <- function(sc_obj, dat){
  if (is.vector(dat)){
    new_colname <- deparse(substitute(dat))
    df <- data_frame(!!new_colname := dat)
    df[[new_colname]] <- dat
    df[["cell_id"]] <- names(dat)
    sc_obj$meta_dat <- left_join(sc_obj$meta_dat,
                                 df,
                                 by = "cell_id")
    
  } else if (is.data.frame(dat)) {
    sc_obj$meta_dat <- left_join(sc_obj$meta_dat,
                                 dat,
                                 by = "cell_id")
  }
  sc_obj
}

compute_summaries <- function(sc_obj){
  ## raw counts
  total_umis <- colSums(sc_obj$umi_matrix)
  names(total_umis) <- colnames(sc_obj$umi_matrix)
  total_reads <- colSums(sc_obj$read_matrix)
  names(total_reads) <- colnames(sc_obj$read_matrix)
  
  ## norm counts
  norm_total_umis <- colSums(sc_obj$norm_umi)
  names(norm_total_umis) <- colnames(sc_obj$norm_umi)
  norm_total_reads <- colSums(sc_obj$norm_reads)
  names(norm_total_reads) <- colnames(sc_obj$norm_reads)
    
  sc_obj <- add_metadata(sc_obj, total_umis)
  sc_obj <- add_metadata(sc_obj, total_reads)
  sc_obj <- add_metadata(sc_obj, norm_total_umis)
  sc_obj <- add_metadata(sc_obj, norm_total_reads)
  
  ## compute cDNA duplication rate 
  sc_obj$meta_dat$cDNA_duplication <- 1 - (sc_obj$meta_dat$total_umis /
                                             sc_obj$meta_dat$total_reads)
  
  sc_obj
}

sc_objs <- map(sc_objs, generate_matrices)
sc_objs <- map(sc_objs, norm_libsize)
sc_objs <- map(sc_objs, compute_summaries)

Compute enrichment of reads/umis over the original library.

sc_objs <- map(sc_objs,
    function(sub_dat){
      og_counts <- select(sc_objs[[reflib]]$meta_dat,
                          og_total_reads = total_reads,
                          og_total_umis = total_umis,
                          og_norm_total_umis = norm_total_umis,
                          og_norm_total_reads = norm_total_reads,
                          og_cDNA_duplication = cDNA_duplication,
                          cell_id)
      sub_dat$meta_dat <- left_join(sub_dat$meta_dat,
                         og_counts, 
                         by = "cell_id")
      
      sub_dat$meta_dat <- mutate(sub_dat$meta_dat,
                                 read_proportion = log2( total_reads / og_total_reads),
                                 umi_proportion = log2( total_umis / og_total_umis),
                                 norm_read_proportion = log2( norm_total_reads /
                                                                og_norm_total_reads),
                                 norm_umi_proportion = log2( norm_total_umis /
                                                               og_norm_total_umis))
      sub_dat
    })

plot cDNA duplication rate

sc_metadat <- map(sc_objs, ~.x$meta_dat) %>% 
  bind_rows(.id = "library") 

ggplot(sc_metadat, aes(total_reads, cDNA_duplication)) +
  geom_point(aes(color = subsampled)) +
  labs(x = expression(paste("Read count (log"[10],")")),
       y = "Sequencing saturation") + 
  scale_x_log10() +
  scale_color_manual(values = color_palette) +
  facet_wrap(~library)
Note the high level of sequencing saturation (0 = no-duplication, 1 = all duplicates) in the original library. Also note that the libraries tend to have higher saturatioin rates, after subsampling.

Note the high level of sequencing saturation (0 = no-duplication, 1 = all duplicates) in the original library. Also note that the libraries tend to have higher saturatioin rates, after subsampling.

plot read and umi counts per library

global_plot_theme <- theme(
        legend.position = "top",
        legend.text = element_text(size = 10),
        strip.text = element_text(size = 8))

subsampled_metadat <- sc_metadat[sc_metadat$library != reflib, ] %>% 
  mutate(library = factor(library, 
                          levels = c(subsampled_libs)))

unnorm_plt <- ggplot(subsampled_metadat, 
                     aes(og_total_reads / 3, total_reads / 3, colour = subsampled)) + 
  geom_point(size = 0.5) + 
  geom_abline(slope = 1) +
  facet_wrap(~library, nrow = 1) + 
#  coord_fixed() +
  xlab("original library\nreads count (Thousands)") +
  ylab("subsampled library\nreads count (Thousands)") +
 # ggtitle("Raw reads associated with each cell barcode") +
  scale_colour_manual(name = "subsampled:",
                      values = color_palette) +
  theme_cowplot(font_size = 10, line_size = .5) +
  theme(aspect.ratio = 1) + 
  global_plot_theme

norm_plt <- ggplot(subsampled_metadat, aes(og_norm_total_reads / 1e3, 
                                           norm_total_reads / 1e3, 
                                           colour = subsampled)) + 
  geom_point(size = 0.5) + 
  geom_abline(slope = 1) + 
  facet_wrap(~library, nrow = 1) + 
  xlab("original library\nRPM (Thousands)") +
  ylab("subsampled library\nRPM (Thousands)") +
  scale_colour_manual(name = "subsampled:",
                      values = color_palette) +
  theme_cowplot(font_size = 10, line_size = 0.5) +
  theme(aspect.ratio = 1) + 
  global_plot_theme

unnorm_umi_plt <- ggplot(subsampled_metadat, 
                         aes(og_total_umis / 1e3, total_umis / 1e3, colour = subsampled)) + 
  geom_point(size = 0.5) + 
  geom_abline(slope = 1) +
 # coord_fixed() +
  facet_wrap(~library, nrow = 1) + 
  xlab("original library\nUMI count (Thousands)") +
  ylab("subsampled library\nUMI count (Thousands)") +
  scale_colour_manual(name = "subsampled:",
                      values = color_palette) +
  theme_cowplot(font_size = 10, line_size = .5) +
    theme(aspect.ratio = 1) + 
  global_plot_theme

norm_umi_plt <- ggplot(subsampled_metadat, 
                       aes(og_norm_total_umis / 1e3, norm_total_umis / 1e3, colour = subsampled)) + 
  geom_point(size = 0.5) + 
  geom_abline(slope = 1) + 
  facet_wrap(~library, nrow = 1) + 
  xlab("original library\nUMI normalized RPM (Thousands)") +
  ylab("subsampled library\nUMIs per Million (Thousands)") +
  scale_colour_manual(name = "subsampled:",
                      values = color_palette) +
  theme_cowplot(font_size = 10, line_size = 0.5) +
  theme(aspect.ratio = 1) + 
  global_plot_theme

plt <- plot_grid(unnorm_plt, norm_plt, unnorm_umi_plt, norm_umi_plt, 
                 labels = "AUTO",
                 align = 'hv')
plt

save_plot("reads_per_barcode_scatterplots.pdf", plt, nrow = 2, ncol = 2, base_width = 8 )

Plot enrichment of reads/umis

read <- ggplot(subsampled_metadat, 
       aes(cell_id, norm_read_proportion, colour = subsampled)) + 
  geom_point() + 
  labs(x = "Cell", 
       y = expression(paste( " Log"[2], " normalized reads ", frac("subsampled", "original")))) +
  scale_colour_manual(name = "subsampled:", values = color_palette) +
  facet_wrap(~library, nrow = 1) + 
  theme_cowplot(font_size = 16, line_size = 1) +
  theme(axis.text.x = element_blank(),
        legend.position = "top",
        legend.text = element_text(size = 12))

umi <- ggplot(subsampled_metadat, 
       aes(cell_id, norm_umi_proportion, colour = subsampled)) + 
  geom_point() + 
  labs(x = "Cell", 
       y = expression(paste( "Log"[2], " normalized UMIs ", frac("subsampled", "original")))) +
  scale_colour_manual(name = "subsampled:", values = color_palette) +
  facet_wrap(~library, nrow = 1) + 
  theme_cowplot(font_size = 16, line_size = 1) +
  theme(axis.text.x = element_blank(),
        legend.position = "top",
        legend.text = element_text(size = 12))

plt <- plot_grid(read, umi,
                 labels = "AUTO",
                 align = 'hv')
plt

ggsave("reads_umi_ratio_per_barcode_normalized.pdf", width = 8, height = 5)

umi <- ggplot(filter(subsampled_metadat, library %in% subsampled_libs), 
       aes(cell_id, norm_umi_proportion, colour = subsampled)) + 
  geom_point() + 
  labs(x = "Cell", 
       y = expression(paste( "Log"[2], " normalized UMIs ", frac("subsampled", "original")))) +
  scale_colour_manual(name = "subsampled:", values = color_palette) +
  facet_wrap(~library, nrow = 1) + 
  theme_cowplot(font_size = 16, line_size = 1) +
  theme(axis.text.x = element_blank(),
        legend.position = "top",
        legend.text = element_text(size = 12))

ggsave("umi_ratio_MA_normalized.pdf", umi, width =5, height = 5)



umi <- ggplot(filter(subsampled_metadat, library %in% subsampled_libs), 
       aes(og_total_umis, umi_proportion, colour = subsampled)) + 
  geom_point() + 
  labs(x = "Total UMIs per barcode", 
       y = expression(paste( "Log"[2], " UMIs ", frac("subsampled", "original")))) +
  scale_colour_manual(name = "subsampled:", values = color_palette) +
  facet_wrap(~library, nrow = 1) + 
  theme_cowplot(font_size = 16, line_size = 1) +
  theme(legend.position = "top",
        legend.text = element_text(size = 12))

ggsave("umi_ratio_MA.pdf", umi, width =5, height = 5)
ggplot(subsampled_metadat, aes(subsampled, 
                read_proportion, fill = subsampled)) + 
  geom_boxplot() + 
  facet_wrap(~library, nrow = 1) +
  xlab("Selected for Reamplification") +
  ylab(expression(paste( "Log"[2], " Reads ", frac("subsampled", "original")))) +
  scale_fill_manual(name = "subsampled:",
                    values = color_palette) +
  theme_cowplot(font_size = 16, line_size = 0.5) +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_blank(),
    legend.position = "top",
    legend.text = element_text(size = 12)
  )

ggsave("reads_ratio_per_barcode_boxplot.pdf", width = 6, height = 5)

ggplot(subsampled_metadat, aes(subsampled, 
                umi_proportion, fill = subsampled)) + 
  geom_boxplot() + 
  facet_wrap(~library, nrow = 1) +
  xlab("Selected for Reamplification") +
  ylab(expression(paste( "Log"[2], " UMIs ", frac("subsampled", "original")))) +
  scale_fill_manual(name = "subsampled:",
                    values = color_palette) +
  theme_cowplot(font_size = 16, line_size = 0.5) +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_blank(),
    legend.position = "top",
    legend.text = element_text(size = 12)
  )

ggsave("umi_ratio_per_barcode_boxplot.pdf", width = 6, height = 5)

ggplot(subsampled_metadat, aes(subsampled, 
                norm_read_proportion, fill = subsampled)) + 
  geom_boxplot() + 
  facet_wrap(~library, nrow = 1) +
  xlab("Selected for Reamplification") +
  ylab(expression(paste( "Log"[2], " normalized Reads ", frac("subsampled", "original")))) +
  scale_fill_manual(name = "subsampled:",
                    values = color_palette) +
  theme_cowplot(font_size = 16, line_size = 0.5) +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_blank(),
    legend.position = "top",
    legend.text = element_text(size = 12)
  )

ggsave("norm_reads_ratio_per_barcode_boxplot.pdf", width = 6, height = 5)

ggplot(subsampled_metadat, aes(subsampled, 
                norm_umi_proportion, fill = subsampled)) + 
  geom_boxplot() + 
  facet_wrap(~library, nrow = 1) +
  xlab("Selected for Reamplification") +
  ylab(expression(paste( "Log"[2], " normalized UMIs ", frac("subsampled", "original")))) +
  scale_fill_manual(name = "subsampled:",
                    values = color_palette) +
  theme_cowplot(font_size = 16, line_size = 0.5) +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_blank(),
    legend.position = "top",
    legend.text = element_text(size = 12)
  )

ggsave("norm_umi_ratio_per_barcode_boxplot.pdf", width = 6, height = 5)
dat <- group_by(subsampled_metadat, library) %>%
  filter(library != reflib) %>% 
  mutate(total_new = sum(total_reads, na.rm = T), 
         total_old = sum(og_total_reads, na.rm = T))

dat_group <- group_by(dat, library, subsampled) %>% 
  summarize(total_new = sum(total_reads, 
                            na.rm = T) / unique(total_new), 
            total_old = sum(og_total_reads, 
                            na.rm = T) / unique(total_old)) %>% 
  gather(lib, percent_lib, -library, -subsampled ) %>%
  mutate(lib = factor(lib, levels = c("total_old", "total_new"), 
                      labels = c("original\nlibrary", "subsampled\nlibrary")))

ggplot(dat_group, aes(lib, percent_lib, fill = subsampled)) + 
  geom_bar(stat = "identity") + 
  ylab("Fraction of\n Reads Assigned") +
  scale_fill_manual(name = "subsampled:",
                    values = color_palette) +
  facet_wrap(~library) + 
  theme_cowplot(font_size = 16, line_size = 1) +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5),
    legend.position = "top",
    legend.text = element_text(size = 16)
  )

ggsave("proportion_reads_all_barcode_barplot.pdf", width = 7, height = 7)

dat_group %>% 
 rename(Method = library) %>% 
  spread(lib, percent_lib) %>% 
  mutate(`Targeted Library Read Fold-Enrichment` = `subsampled\nlibrary` / `original\nlibrary`) %>%
  filter(subsampled == T) %>% 
  select(Method, `Targeted Library Read Fold-Enrichment`)

Genes detected

## compute per gene or per gene/umi combo enrichment
detected_molecules <- function(sc_obj, molecule = "gene"){
  umis <- sc_obj$umi_matrix
  if (molecule == "gene"){
    n_genes <- colSums(umis > 0)
    out_mdat <- data_frame(cell_id = colnames(umis),
      n_genes = n_genes)
    sc_obj <- add_metadata(sc_obj, out_mdat)
    }
}
sc_objs <- map(sc_objs, ~detected_molecules(.x))
subsampled_metadat <- map(sc_objs, ~.x$meta_dat) %>% 
  bind_rows(.id = "library") %>% 
   mutate(library = factor(library, 
                           levels = libs))

og_genes <- filter(subsampled_metadat, 
                   library == reflib) %>% 
  dplyr::select(cell_id, 
                og_genes = n_genes)

subsampled_metadat <- left_join(subsampled_metadat, 
                                og_genes,
                                by = "cell_id") %>% 
  filter(library != reflib)

ggplot(subsampled_metadat, aes(og_genes, 
                               n_genes, colour = subsampled)) + 
  geom_point() + 
  ylab("subsampled_genes") +
  xlab("original_genes") + 
  scale_colour_manual(name =  "subsampled:", values = color_palette) +
  facet_wrap(~library, nrow = 1) + 
  theme_cowplot(font_size = 16, line_size = 1) +
  theme(
    legend.position = "top",
    legend.text = element_text(size = 16)
  )

Parse out new versus previously identified genes

calc_gene_sensitivity <- function(sc_obj, 
                                  type = "umi"){
  
  if (type == "umi"){
    count_matrix <- sc_obj$umi_matrix
  } else {
    count_matrix <- sc_obj$read_matrix
  }
  # generate list named with barcode of each detected gene and 
  # respective read/umi count
  genes_detected <- apply(count_matrix, 2, function(x) x[x > 0])
  sc_obj$genes_detected <- genes_detected
  sc_obj
}

sc_objs <- map(sc_objs, calc_gene_sensitivity)
sc_objs <- map(sc_objs, 
           function(x){
             og_genes <- sc_objs[[reflib]]$genes_detected
             sub_genes <- x$genes_detected
             
             # subset list of cell barcodes to the same as the og experiment
             # and also reorders the barcodes to match
             sub_genes <- sub_genes[names(og_genes)]
             
             if(length(sub_genes) != length(og_genes)){
               stop("barcode lengths not the same")
             }
             shared_genes <- map2(sub_genes, 
                                  og_genes,
                                  ~intersect(names(.x),
                                             names(.y)))
             new_genes <- map2(sub_genes,
                               og_genes,
                               ~setdiff(names(.x),
                                        names(.y)))
             
             not_recovered_genes <- map2(og_genes,
                                         sub_genes,
                                         ~setdiff(names(.x),
                                                  names(.y)))
             x$shared_genes <- shared_genes
             x$new_genes <- new_genes
             x$not_recovered_genes <- not_recovered_genes
             return(x)
             })

## add gene recovery info to meta data table
sc_objs <- map(sc_objs, 
           function(x){
             shared_genes <- map2_dfr(x$shared_genes, 
                                names(x$shared_genes),
                                function(x, y){
                                  data_frame(cell_id = y,
                                             shared_genes = length(x))
                                 })
             
             not_recovered_genes <- map2_dfr(x$not_recovered_genes, 
                                names(x$not_recovered_genes),
                                function(x, y){
                                  data_frame(cell_id = y,
                                            not_recovered_genes = length(x))
                                 })
             
             new_genes <- map2_dfr(x$new_genes, 
                                names(x$new_genes),
                                function(x, y){
                                  data_frame(cell_id = y,
                                            new_genes = length(x))
                                 })
             gene_mdata <- left_join(shared_genes,
                                     not_recovered_genes,
                                     by = "cell_id") %>% 
               left_join(., new_genes, by = "cell_id")
             
             x <- add_metadata(x, gene_mdata)
             x
           })

subsampled_metadat <- map(sc_objs, ~.x$meta_dat) %>% 
  bind_rows(.id = "library") %>% 
   mutate(library = factor(library, 
                          levels = libs))
genes_recovered <- subsampled_metadat %>% 
  dplyr::filter(library != reflib) %>% 
  dplyr::select(cell_id, 
                library,
                subsampled,
                shared_genes,
                not_recovered_genes, 
                new_genes)

genes_recovered <- gather(genes_recovered, 
                          key = type, value = count, 
                          -cell_id, -subsampled, -library)
genes_recovered <- mutate(genes_recovered,
                          type = str_replace_all(type, "_", "\n"))

plt <- ggplot(genes_recovered, 
       aes(cell_id, count)) +
  geom_point(aes(color = subsampled),
             size = 0.6,
             alpha = 0.75) +
  facet_grid(type ~ library) +
  theme(axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        strip.text.y = element_text(size = 12,
                                    margin = margin(0,0.2,0,0.2, "cm"))) +
  scale_color_manual(values = color_palette)

plt 

save_plot("new_genes_detected.pdf", plt, base_width = 8, base_height = 8)

targeted <- genes_recovered %>% 
  dplyr::filter(library %in% subsampled_libs,
                subsampled) 
targeted
plt_dat <- genes_recovered %>% 
  dplyr::filter(!library %in% subsampled_libs,
                subsampled) %>% 
  group_by(type) %>% 
  summarize(count = mean(count)) %>% 
  mutate(cell_id = "Not Targeted Barcodes") %>% 
  bind_rows(targeted, .) %>% 
  filter(type == "new\ngenes")


plt <- ggplot(plt_dat, 
       aes(cell_id, count)) +
  geom_bar(aes(fill = cell_id),
           stat = "identity") +
  labs(y = "Newly detected genes") + 
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45,
                                   hjust = 1),
        legend.position = "none") +
  scale_fill_brewer(palette = "Set1")

plt

save_plot("new_genes_barplot.pdf", plt, 
          base_width = 3.6, base_height = 5)

Parse out new verses old UMIs

compare_umis <- function(path_to_ctrl,
                         path_to_test,
                         return_summary = F,
                         cells_exclude = "Cell_unmatched"){

  ## umi seqs should be produced by ./get_molecule_info
  ctrl_umi_seqs <- read_tsv(path_to_ctrl,
                   col_names = c("barcode_10x", 
                                 "umi_molecule", 
                                 "count")) %>% 
  filter(!barcode_10x %in% cells_exclude)

  test_umi_seqs <- read_tsv(path_to_test,
                   col_names = c("barcode_10x", 
                                 "umi_molecule", 
                                 "count")) %>% 
  filter(!barcode_10x %in% cells_exclude)

  umi_seqs <- full_join(ctrl_umi_seqs, 
          test_umi_seqs, 
          by = c("barcode_10x", "umi_molecule"))
  
  if (return_summary) {
    umi_seqs %>% 
      mutate(new_umi = ifelse(is.na(count.x) & !is.na(count.y), 
                          1L, 
                          0L),
         not_detected_umi = ifelse(!is.na(count.x) & is.na(count.y),
                                   1L,
                                   0L),
         shared_umi = ifelse(!is.na(count.x) & !is.na(count.y),
                             1L,
                             0L)) %>% 
      group_by(barcode_10x) %>% 
      summarize(new_umis = sum(new_umi),
            not_detected_umis = sum(not_detected_umi),
            shared_umis = sum(shared_umi))
  } else {
    umi_seqs
  }
}

umi_files <- file.path(data_dir, libs, "umis", "umigroups.txt.gz")

umi_summaries <- map(umi_files[2],
                   ~compare_umis(umi_files[1], .x, return_summary = T))

names(umi_summaries) <- umi_files[2] %>% 
  str_split(., "/") %>% 
  map_chr(~.x[7])

umi_summary <- bind_rows(umi_summaries, .id = "library")

umis_recovered <- umi_summary %>% 
  gather(class, count, -barcode_10x, -library) 

## annotate with subsampled or not
cell_annot <- data_frame(barcode_10x = cells,
                         library = libs,
                         subsampled = T) %>% 
  unnest()

umis_recovered <- umis_recovered %>% 
  mutate(subsampled = ifelse(str_replace(barcode_10x, "-1", "") %in% unlist(cells),
                             T,
                             F)) %>% 
  arrange(subsampled)

plt <- ggplot(umis_recovered, 
       aes(barcode_10x, count)) +
  geom_point(aes(colour = subsampled),
             size = 0.6,
             alpha = 0.75) +
  facet_grid(library ~ class) +
  theme(axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        strip.text.y = element_text(size = 12,
                                    margin = margin(0,0.2,0,0.2, "cm"))) +
  scale_color_manual(values = color_palette)

plt 

umi_seqs <- map(umi_files[2],
                ~compare_umis(umi_files[1], .x, return_summary = F))

names(umi_seqs) <- umi_files[2] %>% 
  str_split(., "/") %>% 
  map_chr(~.x[7])

new_umis <- map(umi_seqs, 
    ~filter(.x, 
       str_replace(barcode_10x, "-1", "") %in% unlist(cells), 
       !is.na(count.y), 
       is.na(count.x))  %>% 
  separate(umi_molecule, c("seq", "gene"), sep = "::") %>% 
    select(-starts_with("count"))) 

old_umis <- map(umi_seqs, 
    ~filter(.x, 
       str_replace(barcode_10x, "-1", "") %in% unlist(cells), 
       !is.na(count.x),
       !is.na(count.y))  %>% 
  separate(umi_molecule, c("seq", "gene"), sep = "::") %>% 
    select(-starts_with("count"))) 


umi_edit_dist <- map2(new_umis,
                      old_umis,
                     ~left_join(.x, .y, 
               by = c("barcode_10x", "gene")) %>% 
  na.omit() %>% 
  mutate(ed = kentr::get_hamming_pairs(seq.x, seq.y)) %>% 
  group_by(barcode_10x, seq.y, gene) %>% 
  summarize(min_ed = min(ed)) %>% 
  ungroup())

umi_edit_dist <- bind_rows(umi_edit_dist, 
                           .id = "library")

ggplot(umi_edit_dist, aes(barcode_10x, 
                          min_ed)) + 
  geom_boxplot(coef = Inf) +
  facet_wrap(~library, scales = "free_x") +
  labs(y = "Minimum edit distance\noriginal vs. new UMIs") +
  theme(axis.text.x = element_text(angle = 90,
                                   hjust = 1, 
                                   vjust = 0.5),
        axis.title.x = element_blank())

rm(umi_seqs)

tSNE analysis

original library tSNE

library(Seurat)

mat <- sc_objs[[reflib]]$umi_matrix
sobj <- CreateSeuratObject(mat)
sobj <- NormalizeData(sobj)
sobj <- ScaleData(sobj, vars.to.regress = "nUMI")
## [1] "Regressing out nUMI"
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |                                                                 |   1%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |==                                                               |   4%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |=========                                                        |  15%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |===========                                                      |  18%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |=============                                                    |  21%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |===============                                                  |  24%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |===================                                              |  30%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |========================                                         |  38%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |==========================                                       |  41%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |============================                                     |  44%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |=================================                                |  52%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |===================================                              |  55%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |=====================================                            |  58%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |=======================================                          |  61%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |=========================================                        |  64%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |=============================================                    |  70%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |================================================                 |  75%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |==================================================               |  78%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |====================================================             |  81%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |======================================================           |  84%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |===============================================================  |  98%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================|  99%
  |                                                                       
  |=================================================================| 100%
## Time Elapsed:  3.4817795475324 mins
## [1] "Scaling data matrix"
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  18%
  |                                                                       
  |=============                                                    |  19%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  30%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  81%
  |                                                                       
  |======================================================           |  82%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |===============================================================  |  96%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |=================================================================| 100%
sobj <- FindVariableGenes(sobj, do.plot = F)
sobj <- RunPCA(sobj, pc.genes = sobj@var.genes, pcs.compute = 20, do.print = F)
sobj <- RunTSNE(sobj, dims.use = 1:18, seed.use = 20180516)
sobj <- FindClusters(sobj, dims.use = 1:18, resolution = 0.6, 
                     print.output = F, 
                     random.seed = 20180516)

plt <- TSNEPlot(sobj, 
                colors.use = brewer.pal(12, "Paired"),
                do.label = T) + 
  labs(title = "PBMCs") +
  theme(legend.position = "none")

plt

save_plot("original_mkcells_tsne.pdf", plt, base_height = 4.25, base_width = 4.25)

original library tSNE supplemented with resampled barcodes

mat <- sc_objs[[reflib]]$umi_matrix

subsampled_ids <- sc_objs[[subsampled_libs]]$meta_dat %>% 
  filter(subsampled) %>% 
  pull(cell_id)

subsampled_mat <- sc_objs[[subsampled_libs]]$umi_matrix[, subsampled_ids]
colnames(subsampled_mat) <- str_c(colnames(subsampled_mat), "::", "subsampled")

mat <- as.data.frame(as.matrix(mat)) %>% rownames_to_column("cell")
subsampled_mat <- as.data.frame(as.matrix(subsampled_mat)) %>% rownames_to_column("cell")

combined_mats <- left_join(mat, subsampled_mat, by = c("cell")) 
combined_mats <- as.data.frame(combined_mats) %>% 
  column_to_rownames("cell") %>% 
  as.matrix() %>% 
  as(., "sparseMatrix")   

combined_mats[is.na(combined_mats)] <- 0

sobj <- CreateSeuratObject(combined_mats)

new_ids <- sobj@meta.data %>% 
  rownames_to_column("cell") %>% 
  mutate(subsampled = ifelse(str_detect(cell, "subsampled"),
                             "subsampled",
                             "not subsampled"))

subsampled_cell_ids <- new_ids[new_ids$subsampled == "subsampled", "cell"] %>% 
  str_replace("::subsampled", "")
 
new_ids <- mutate(new_ids, 
                  subsampled = ifelse(cell %in% subsampled_cell_ids, 
                                      "original cell",
                                      subsampled)) %>% 
  select(cell, subsampled) %>% 
  as.data.frame(.) %>% 
  column_to_rownames("cell")

sobj <- AddMetaData(sobj, new_ids)
sobj <- NormalizeData(sobj)
sobj <- ScaleData(sobj, vars.to.regress = "nUMI")
## [1] "Regressing out nUMI"
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |                                                                 |   1%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |==                                                               |   4%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |=========                                                        |  15%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |===========                                                      |  18%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |=============                                                    |  21%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |===============                                                  |  24%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |===================                                              |  30%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |========================                                         |  38%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |==========================                                       |  41%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |============================                                     |  44%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |=================================                                |  52%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |===================================                              |  55%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |=====================================                            |  58%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |=======================================                          |  61%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |=========================================                        |  64%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |=============================================                    |  70%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |================================================                 |  75%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |==================================================               |  78%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |====================================================             |  81%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |======================================================           |  84%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |===============================================================  |  98%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================|  99%
  |                                                                       
  |=================================================================| 100%
## Time Elapsed:  3.43020258347193 mins
## [1] "Scaling data matrix"
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  18%
  |                                                                       
  |=============                                                    |  19%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  30%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  81%
  |                                                                       
  |======================================================           |  82%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |===============================================================  |  96%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |=================================================================| 100%
sobj <- FindVariableGenes(sobj, do.plot = F)
sobj <- RunPCA(sobj, pc.genes = sobj@var.genes, pcs.compute = 20, do.print = F)
sobj <- RunTSNE(sobj, dims.use = 1:18, seed.use = 20180516)
sobj <- FindClusters(sobj, dims.use = 1:18, resolution = 0.6, 
                     print.output = F, 
                     random.seed = 20180516)

plt <- TSNEPlot(sobj, 
                colors.use = brewer.pal(12, "Paired"),
                do.label = T) + 
  labs(title = "PBMCs") +
  theme(legend.position = "none")

plt

save_plot("original_with_subsampled_mkcells_tsne.pdf", plt, base_height = 4.25, base_width = 4.25)
subsampled_cells <- TSNEPlot(SetAllIdent(sobj, 
                                         "subsampled"), 
                             colors.use = c("lightgrey",
                                            brewer.pal(7, "Set1")[1:2])) + 
  labs(title = "PBMC") +
  theme(legend.position = "bottom")

tmp <- subsampled_cells$data 

og_cell_dat <- tmp[cells[[subsampled_libs]], ] %>%
  rownames_to_column("cell")

subsampled_cell_dat <- tmp[str_c(cells[[subsampled_libs]], "::subsampled"), ] %>%
  rownames_to_column("cell") %>% 
  mutate(cell = str_replace(cell, "::subsampled", "")) %>% 
  select(cell, tSNE_1, tSNE_2)

cell_dat <- left_join(og_cell_dat, 
                      subsampled_cell_dat, 
                      by = "cell", 
                      suffix = c("", "_resampled"))

subsampled_cells <- TSNEPlot(SetAllIdent(sobj, 
                                         "subsampled"), 
                             colors.use = c("lightgrey",
                                            brewer.pal(7, "Set1")[1:2])) + 
                  geom_segment(data = cell_dat, 
                               aes(x = tSNE_1,
                                   y = tSNE_2, 
                                   xend = tSNE_1_resampled,
                                   yend = tSNE_2_resampled),
                               linejoin = "mitre",
                               arrow = arrow(length = unit(0.03, "npc"))) + 
  theme(legend.position = "bottom")

plt <- plot_grid(plt, 
                 subsampled_cells,
                 nrow = 2)

save_plot("resampled_tsne.pdf", plt, 
          nrow = 2, ncol = 1,
          base_height = 5.5, base_width = 4.25)

kNN analysis

Find the k-nearest neighbors in PCA space

## use combined data from above
data.use <- GetCellEmbeddings(object = sobj,
                              reduction.type = "pca",
                              dims.use = 1:20)

## find top 10 nearest neighboors using exact search
knn <- RANN::nn2(data.use, k = 5,
                 searchtype = 'standard',
                 eps = 0)

subsampled_idxs <- knn$nn.idx[str_detect(rownames(data.use), "::subsampled"), ]

nn_ids <- as_data_frame(t(apply(subsampled_idxs, 1,
                      function(x)rownames(data.use)[x])))

colnames(nn_ids) <- c("query_cell", paste0("nearest neighbor ", 1:(ncol(nn_ids) - 1)))

nn_ids